
*******************************************************************************
* Calcualte average characteristics of homogenous relatives (siblings and cousins) 
* for each index worker. See definition of homogenous groups in main do file
*******************************************************************************

* Save a list with only coworkers in the sample to make code more efficient
use Data/data_coworkers, clear
keep PersonId
duplicates drop
save Data/tmp_data_relatives_X, replace

use Data/data_relatives, clear
merge m:1 PersonId using Data/tmp_data_relatives_X
keep if _merge == 3
drop _merge

* Add if relatives voted or not and other characteristics
rename PersonId IndexWorkerId
rename Relative PersonId
merge m:1 PersonId using Data/data_voting_2010
* This removes all relatives that were not 18 in 2010
drop if _merge == 2
drop _merge
merge m:1 PersonId using Data/data_ind_X_2010
drop if _merge == 2
drop _merge
drop P0846_lopnr_PeOrgNr WorkplaceId AstKommun Forsamling
compress

* Drop relatives that were not elegible to vote in 2010 - cannot be used to
* construct instrument
drop if Voted2010 == .

* Collapse data to mean values by index worker using only relatives which 
* belongs to the homogenous family peer group.
foreach Relative_var of varlist Voted2010-NumberOfChildren {
 rename `Relative_var' Relative_`Relative_var'
}
 
 
* Add characteristics of the index worker so we can see if a relative
* is in the same homogenous peer group
drop PersonId
rename IndexWorkerId PersonId 
merge m:1 PersonId using Data/data_ind_X_2017
keep if _merge == 3
drop  num_of_workers deso Occ1 WP_Org_Sni2007 WP_SNI_Section WP_P0846_lopnr_PeOrgNr WorkplaceId birth_year birth_month birth_day AstKommun Ssyk4_2012_J16 Varldsdelnamn FodelselandGrp_egen Foreign Age2018 Age2022 Org_* Ftg_*
drop _merge
compress 
save Data/tmp_alt_instrument_r51, replace

use Data/tmp_alt_instrument_r51, clear
*******************************************************************************
* Below are the only differences from data_relatives_X. The differences are 
* that we do not keep the homogenous family peers, but rather the complement
* and also restictions on age and income similarities 
*******************************************************************************

gen age_diff = abs(Relative_Age2010 - Age2010)

keep if Relative_EduCollege != EduCollege | Relative_Female != Female

keep if age_diff > 5 
gen income_diff = abs(Relative_Income - Income)
* keep if above median income gap (and drop extreme outliers, 1% of sample)
keep if 2064 <= income_diff & income_diff <= 10107

gen PersonId2 = PersonId
collapse (mean)Relative_Voted2010-Relative_NumberOfChildren (count)NumberOfRelatives = PersonId2, by(PersonId)

save Data/data_relatives_X_alt_instrument, replace








